import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, assert_series, logical_or, to_list, fetch_index


atp_tennis = read_csv_file(os.path.join(sys.argv[1], 'atp_tennis.csv'))

# Filter the DataFrame
federer_matches = atp_tennis[logical_or(assert_series(atp_tennis['Player_1'], 'Federer R.', 'equality'), assert_series(atp_tennis['Player_2'], 'Federer R.', 'equality'))]

# Get the index using the custom function
federer_match_ids_index = fetch_index(federer_matches)

# Convert the index to a list
federer_match_ids = to_list(federer_match_ids_index)

print(federer_match_ids)
# pickle.dump(federer_match_ids,open("./ref_result/federer_match_ids.pkl","wb"))



import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, logical_and, logical_or, create_dataframe, concatenate_objects, search_where, extract_unique_values, update_dict, assert_series



# Find Federer matches
federer_matches = atp_tennis[logical_or(assert_series(atp_tennis['Player_1'], 'Federer R.', 'equality'), assert_series(atp_tennis['Player_2'], 'Federer R.', 'equality'))]

# Get Federer opponents
federer_opponents = extract_unique_values(search_where(federer_matches['Player_1'], assert_series(federer_matches['Player_1'], 'Federer R.', 'inequality'), federer_matches['Player_2']))

# Retrieve unique surface types
surface_types = extract_unique_values(atp_tennis['Surface'])

# Function to calculate a tennis player's overall win rate, head-to-head record, and performance on different surfaces against a specific opponent.
def calculate_performance(player, opponent, data, surface_types):
    player_matches = data[logical_or(logical_and(assert_series(data['Player_1'], player, 'equality'), assert_series(data['Player_2'], opponent, 'equality')), logical_and(assert_series(data['Player_1'], opponent, 'equality'), assert_series(data['Player_2'], player, 'equality')))]
    total_matches = len(player_matches)
    wins = len(player_matches[assert_series(player_matches['Winner'], player, 'equality')])
    win_rate = wins / total_matches if total_matches > 0 else 0

    h2h_record = wins / total_matches if total_matches > 0 else 0

    surface_performance = {}
    for surface in surface_types:
        player_surface_matches = player_matches[assert_series(player_matches['Surface'], surface, 'equality')]
        player_surface_wins = len(player_surface_matches[assert_series(player_surface_matches['Winner'], player, 'equality')])
        surface_performance[surface] = player_surface_wins / player_surface_matches.shape[0] if player_surface_matches.shape[0] > 0 else 0

    return win_rate, h2h_record, surface_performance

# Initialize an empty dataframe for storing the results
columns = ['Player', 'Opponent', 'Win Rate', 'Head-to-Head'] + [f'{surface} Surface Performance' for surface in surface_types]
player_stats = create_dataframe(data=[], columns=columns)

# Loop through Federer's opponents
for opponent in federer_opponents:
    # Calculate win rate, head-to-head record, and surface performance for Raonic M. against each opponent
    win_rate, h2h_record, surface_performance = calculate_performance('Raonic M.', opponent, atp_tennis, surface_types)

    # Append the results to the dataframe
    row_data = {'Player': 'Raonic M.', 'Opponent': opponent, 'Win Rate': win_rate, 'Head-to-Head': h2h_record}
    update_dict(row_data, {f'{surface} Surface Performance': surface_performance[surface] for surface in surface_types})
    player_stats = concatenate_objects(player_stats, create_dataframe(row_data, index=[0]))

print(player_stats)
pickle.dump(player_stats,open("./ref_result/player_stats.pkl","wb"))